/*==============================================================================
FILE NAME: Create_Air_Panel.do
PURPOSE: This .do file creates Air_Panel dataset used in analysis.
INPUTS: NSR_Permits_Clean.dta, TitleV_Permits_Clean.dta, facility_characteristics.dta,
Investigations_Clean.dta, IN_vio_cat.dta, Notice_of_Violation_Clean.dta Enforcements_Clean.dta, incidents.dta, industry_air_complaints.dta, incidents_clean.dta,
county complaints.dta, county air complaints.dta, region complaints.dta, region air complaints.dta, area complaints.dta, area air complaints.dta, Emissions_events.dta
OUTPUTS: Panel_inv_cat.dta, SIC_2digit`X'_panel.dta, Air_Panel.dta
CREATED: 9 August 2024
UPDATED: 9 July 2025
==============================================================================*/


clear all

/* Set directory if working independently through code
if c(username)=="" { //insert username
	global rootdir "" // insert root path
	global processed_data "$rootdir/processed_data"  // Define global paths for replication package
} 
*/

set more off
//1. create a file that includes the RNs of facilities that have a Title V, NSR or both permits
//panel will focus on RNs that have a Title V permit, NSR permit or both
//assumption is that PNs uniquely identify a permit
use "$processed_data/NSR_Permits_Clean.dta", clear
gen RN_id=substr(RN,3,.)
label var RN_id "same as RN without 'RN'"
destring RN_id, replace
keep RN_id NSR
count if RN_id==.
drop if RN_id==.
sort RN_id
by RN_id: gen nobs=_N
tab nobs
duplicates drop
drop nobs
sort RN_id
unique RN_id
save "$processed_data/NSR_temp.dta", replace

use "$processed_data/TitleV_Permits_Clean.dta", clear
gen RN_id=substr(RN,3,.)
label var RN_id "same as RN without 'RN'"
destring RN_id, replace
keep RN_id TitleV
count if RN_id==.
drop if RN_id==.
sort RN_id
by RN_id: gen nobs=_N
tab nobs
duplicates drop
drop nobs
sort RN_id
unique RN_id
save "$processed_data/TitleV_temp.dta", replace


use "$processed_data/facility_characteristics.dta", clear
//The variables County and Q are identical and all non-missing.
rename RegulatedEntityNo RN
gen RN_id=substr(RN,3,.)
label var RN_id "same as RN without 'RN'"
destring RN_id, replace
keep RN_id County TCEQRegion
sort TCEQRegion
egen region_id=group(TCEQRegion)
label var region_id "numeric identifier for TCEQRegion"
gen area_id=.
replace area_id=1 if region_id==6 | region_id==7 | region_id==15 | region_id==16
replace area_id=2 if region_id==9 | region_id==11 | region_id==13
replace area_id=3 if region_id==5 | region_id==10 | region_id==12 | region_id==14
replace area_id=4 if region_id==1 | region_id==2 | region_id==3 | region_id==4 | region_id==8
label define area 1 "Border & Permian Basin" 2 "Central" 3 "Coastal & East" 4 "North Central & West"
label values area_id area
label var area_id "TCEQ Area"
drop TCEQRegion
duplicates drop
//RN uniquely identifies obs at this point
sort RN_id
save "$processed_data/facilities_temp.dta", replace

//how many RNs have NSR &/or Title V permits?
use "$processed_data/TitleV_temp.dta", clear
append using "$processed_data/NSR_temp.dta"
replace NSR=0 if NSR==.
replace TitleV=0 if TitleV==.
sort RN_id
collapse (sum) NSR TitleV, by(RN_id)
tab NSR TitleV, row column
//only 4% of RNs that have an NSR permit also have a Title V permit
//almost 96% of RNs that have a Title V permit also have an NSR permit
sort RN_id
save "$processed_data/TitleV_NSR_temp.dta", replace
//this file contains the RNs of facilities that show up in the Title V &/or NSR data
//this file will form the cross-sectional basis of the panel

erase "$processed_data/TitleV_temp.dta"
erase "$processed_data/NSR_temp.dta"

//2. merge with facilities data to get county, region, and area
//explore match between RNs with NSR &/or Title V permits and facility characteristics file
use "$processed_data/TitleV_NSR_temp.dta", clear
sort RN_id
merge 1:1 RN_id using "$processed_data/facilities_temp.dta"
tab _merge NSR, row column
//96% of RNs with NSR permit find a match in facilities data
tab _merge TitleV, row column
//99.5% of RNs with Title V permit find a match in facilities data
drop if _merge==2
drop _merge
sort RN_id
save "$processed_data/TitleV_NSR_temp.dta", replace

//3. set up scaffolding for RN-by-month panel for RNs with TitleV/NSR permits; 204 months between Jan2003 & Dec2019

use "$processed_data/TitleV_NSR_temp.dta", clear
keep RN_id
duplicates drop
sort RN_id
unique RN_id
//59556 unique RNs show up in TitleV/NSR permits data
//create RN-by-month panel structure for these RNs
gen year=2003
gen month=1
gen mdate=ym(year,month)
format mdate %tm
drop year month
//need to create numeric identifier for RN so we can xtset the data
//drop "RN" and destring
isid RN_id
//build scaffolding
xtset RN_id mdate
tsappend, add(203)
gen date=dofm(mdate)
gen month=month(date)
gen year=year(date)
order year, after(mdate)
order month, after(year)
drop date
sort RN_id
sort RN_id mdate
merge m:1 RN_id using "$processed_data/TitleV_NSR_temp.dta"
drop _merge
order County, after(month)
save "$processed_data/Panel_scaffold.dta", replace

//4. merge in investigations
//separate by complaint/noncomplaint investigation first
use "$processed_data/Investigations_Clean.dta", clear
gen RN_id=substr(RN,3,.)
label var RN_id "same as RN without 'RN'"
destring RN_id, replace
destring IN, replace
sort IN
save "$processed_data/inv_temp.dta", replace

use "$processed_data/investigations_with_noe_nov.dta", clear
drop year
sort IN
merge 1:m IN using "$processed_data/inv_temp.dta"
keep if _merge==3
//this will exclude 959 investigations that did not merge successfully with the NOV or NOE files
drop _merge
gen mdate=ym(year,month)
//focus on air investigations
keep if Media=="AIR"
//all but 9 investigations with missing values for Media are before 2003
rename investigation air_investigation
keep IN complaint_inv air_investigation RN_id mdate day
duplicates drop
//at this point IN uniquely identifies obs
sort RN_id mdate day
//next step will give us # of air_investigations by RN-day
collapse (sum) air_investigation complaint_inv, by(RN_id mdate day)
//now create days investigated
sort RN_id mdate
gen day_air_inv=0
replace day_air_inv=1 if air_investigation>0
gen day_air_complaint_inv=0
replace day_air_complaint_inv=1 if complaint_inv>0
gen day_air_nocomplaint_inv=0
replace day_air_nocomplaint_inv=1 if air_investigation>complaint_inv
keep RN_id mdate day_air_inv day_air_complaint_inv day_air_nocomplaint_inv
sort RN_id mdate 
collapse (sum) day_air_inv day_air_complaint_inv day_air_nocomplaint_inv, by(RN_id mdate)
label var day_air_inv "# days RN had >=1 air investigation in month"
label var day_air_complaint_inv "# days RN had >=1 complaint air investigation in month"
label var day_air_nocomplaint_inv "# days RN had >=1 non-complaint air investigation in month"
sort RN_id mdate
//restrict to 2003 to 2019
format mdate %tm
keep if mdate>=tm(2003m1) & mdate<=tm(2019m12)
merge 1:1 RN_id mdate using "$processed_data/Panel_scaffold.dta"
drop if _merge==1
drop _merge
foreach x of varlist day_air_inv-day_air_nocomplaint_inv{
replace `x'=0 if `x'==.
}
sort RN_id mdate
gen p_air_inv=(day_air_inv>0)
label var p_air_inv "=1 if RN had at least one air investigation in month"
gen p_air_complaint_inv=(day_air_complaint_inv>0)
label var p_air_complaint_inv "=1 if RN had at least one complaint-related air investigation in month"
gen p_air_nocomplaint_inv=(day_air_nocomplaint_inv>0)
label var p_air_nocomplaint_inv "=1 if RN had at least one non-complaint-related air investigation in month"
save "$processed_data/Panel_inv.dta", replace

//4b. merge in violations by category
//separate by complaint/noncomplaint investigation first

use "$processed_data/Investigations_Clean.dta", clear
gen RN_id=substr(RN,3,.)
label var RN_id "same as RN without 'RN'"
destring RN_id, replace
destring IN, replace
sort IN
save "$processed_data/inv_temp.dta", replace

use "$processed_data/IN_vio_cat.dta", clear
drop year
sort IN
merge 1:m IN using "$processed_data/inv_temp.dta"
keep if _merge==3
drop _merge
gen mdate=ym(year,month)
//focus on air investigations
keep if Media=="AIR"
//less than 1% of investigations after 2003 have missing values for Media
keep IN complaint_inv RN_id mdate catA catB catC any_cat
duplicates drop
//at this point IN uniquely identifies obs
sort RN_id mdate
//next step will give us # of cat A, cat B, and cat C violations by RN-mdate
collapse (sum) catA catB catC any_cat, by(RN_id mdate)
rename catA air_catA
rename catB air_catB
rename catC air_catC
rename any_cat air_any_cat

label var air_catA "# of category A violations for RN in the month"
label var air_catB "# of category B violations for RN in the month"
label var air_catC "# of category C violations for RN in the month"
label var air_any_cat "# of violations for RN in the month"
gen p_air_catA=(air_catA>0)
label var p_air_catA "=1 if RN had at least one category A violation in the month"
gen p_air_catB=(air_catB>0)
label var p_air_catB "=1 if RN had at least one category B violation in the month"
gen p_air_catC=(air_catC>0)
label var p_air_catC "=1 if RN had at least one category C violation in the month"
gen p_air_any_cat=(air_any_cat>0)
label var p_air_any_cat "=1 if RN had at least one violation in the month"
//restrict to 2003 to 2019
format mdate %tm
keep if mdate>=tm(2003m1) & mdate<=tm(2019m12)
merge 1:1 RN_id mdate using "$processed_data/Panel_scaffold.dta"
drop if _merge==1
drop _merge
foreach x of varlist air_catA-p_air_any_cat{
replace `x'=0 if `x'==.
}
sort RN_id mdate
merge 1:1 RN_id mdate using "$processed_data/Panel_inv.dta"
drop _merge
save "$processed_data/Panel_inv_cat.dta", replace

//5. merge in novs
use "$processed_data/Notice_of_Violation_Clean.dta", clear
gen RN_id=substr(RN,3,.)
label var RN_id "same as RN without 'RN'"
destring RN_id, replace
//focus on air NOVs
keep if violation_air==1
//all but 4 observations that have missing violation_air occur before 2003
keep RN_id VN day month year
gen mdate=ym(year,month)
format mdate %tm
duplicates drop
//the same RN VN appears on different dates
//create a count of unique VNs for each RN-month
keep RN_id VN mdate
duplicates drop
sort RN_id mdate
collapse (count) VN, by(RN_id mdate)
rename VN air_novs
label var air_novs "number of air NOVs received by RN in month"
sort RN_id mdate
merge 1:1 RN_id mdate using "$processed_data/Panel_inv_cat.dta"
unique RN_id if _merge==3
drop if _merge==1
drop _merge
replace air_novs=0 if air_novs==.
sort RN_id mdate
gen p_air_nov=(air_novs>0)
label var p_air_nov "=1 if RN had at least one NOV in month"
save "$processed_data/Panel_inv_nov.dta", replace

//6. merge in noes
use "$processed_data/Enforcements_Clean.dta", clear
gen RN_id=substr(RN,3,.)
label var RN_id "same as RN without 'RN'"
destring RN_id, replace
keep RN_id VN day month year enforcement_air
gen mdate=ym(year,month)
format mdate %tm
//focus on air NOEs
keep if enforcement_air==1
//of 511 observations with missing information on media (about 3%), 400 occur after 2003
//the same RN VN appears on different dates
//create a count of unique VNs for each RN-month
keep RN_id VN mdate
duplicates drop
sort RN_id mdate
collapse (count) VN, by(RN_id mdate)
rename VN air_noes
label var air_noes "number of air NOEs received by RN in month"
sort RN_id mdate
merge 1:1 RN_id mdate using "$processed_data/Panel_inv_nov.dta"
unique RN_id if _merge==3
drop if _merge==1
drop _merge
replace air_noes=0 if air_noes==.
sort RN_id mdate
gen p_air_noe=(air_noes>0)
label var p_air_noe "=1 if RN had at least one air NOE in month"
save "$processed_data/Panel_inv_nov_noe.dta", replace

//7. merge in complaints
//now create count of complaints in month
use "$processed_data/incidents.dta", clear
drop if IncidentStatus=="REFERRED"
gen RN_id=substr(RegulatedEntity,3,.)
label var RN_id "same as RN without 'RN'"
destring RN_id, replace
rename ComplaintIncident CIN
gen temp = date(IncidentRecDate,"MDY")
drop IncidentRecDate 
rename temp IncidentRecDate
format IncidentRecDate %td
replace IncidentRecDate = . if IncidentRecDate < 0
gen year=year(IncidentRecDate)
gen month=month(IncidentRecDate)
gen mdate=ym(year,month)
format mdate %tm
//restrict to 2003 to 2019
keep if mdate>=tm(2003m1) & mdate<=tm(2019m12)
gen incident=1
keep CIN RN_id mdate incident
duplicates drop
isid CIN
//at this point CIN uniquely identifies observations
sort RN_id mdate
//this will give us # of complaints in the month
collapse (sum) incident, by(RN_id mdate)
label var incident "# of complaint incidents against RN in month"
sort RN_id mdate
merge 1:1 RN_id mdate using "$processed_data/Panel_inv_nov_noe.dta"
unique RN_id if _merge==3
drop if _merge==1
drop _merge
replace incident=0 if incident==.
gen p_incident=(incident>0)
label var p_incident "=1 if RN had at least one complaint incident in month"
sort RN_id mdate
save "$processed_data/Panel_inv_nov_noe_inc.dta", replace

//now create count of air complaints in month
//first merge in data on incident effect and nature
use "$processed_data/incidents_clean.dta", clear
gen RN_id=substr(RN,3,.)
label var RN_id "same as RN without 'RN'"
destring RN_id, replace
drop RN
keep RN_id CIN incident_air IncidentRecDate IncidentStatus year month IncidentNature IncidentEffect
duplicates drop
gen mdate=ym(year,month)
format mdate %tm
gen ODOR = 0
replace ODOR = 1 if IncidentNature == "ODOR"
gen DUST = 0
replace DUST = 1 if IncidentNature == "DUST"
gen SMOKE = 0
replace SMOKE = 1 if IncidentNature == "SMOKE"
gen OUTDOOR_BURNING = 0
replace OUTDOOR_BURNING = 1 if IncidentNature == "OUTDOOR BURNING"
gen HEALTH = 0
replace HEALTH = 1 if strpos(IncidentEffect,"HEALTH")
gen PROPERTY = 0
replace PROPERTY = 1 if strpos(IncidentEffect, "PROPERTY")
gen OIL_GAS_SHALE = 0
replace OIL_GAS_SHALE = 1 if strpos(IncidentEffect, "OIL AND GAS") | ///
strpos(IncidentEffect, "EAGLE FORD SHALE") | strpos(IncidentEffect, "BARNETT SHALE")
gen OTHER = 0
replace OTHER = 1 if HEALTH == 0 & PROPERTY == 0 & OIL_GAS_SHALE == 0
drop if IncidentStatus=="REFERRED"
keep if incident_air == 1
//restrict to 2003 to 2019
keep if mdate>=tm(2003m1) & mdate<=tm(2019m12)
gen air_incident=1

//only keeping health variables
keep CIN RN_id mdate air_incident ODOR DUST SMOKE OUTDOOR_BURNING HEALTH PROPERTY OIL_GAS_SHALE OTHER
duplicates drop
isid CIN
//at this point CIN uniquely identifies observations
sort RN_id mdate
collapse (sum) ODOR DUST SMOKE OUTDOOR_BURNING HEALTH PROPERTY OIL_GAS_SHALE OTHER air_incident, by(RN_id mdate)
sort RN_id mdate
merge 1:1 RN_id mdate using "$processed_data/Panel_inv_nov_noe_inc.dta"
unique RN_id if _merge==3
drop if _merge==1
drop _merge
foreach v of varlist ODOR DUST SMOKE OUTDOOR_BURNING HEALTH PROPERTY OIL_GAS_SHALE OTHER air_incident{
	replace `v'=0 if `v'==.
	gen p_`v'=(`v'>0)
	label var p_`v' "=1 if RN had at least one `v' in month"
}
drop ODOR DUST SMOKE OUTDOOR_BURNING HEALTH PROPERTY OIL_GAS_SHALE OTHER
sort RN_id mdate

//8. add counts of other complaints by county-month, region-month, and area-month
//county
gen county_missing=1 if County==""
label var county_missing "=1 if County was missing in facilities data"
rename County county
sort county mdate
merge m:1 county mdate using "$processed_data/county complaints.dta"
replace county_complaint=0 if _merge==1
drop _merge
replace county_complaint=. if county_missing==1
gen other_complaint=county_complaint-incident
label var other_complaint "# of complaints against OTHER RNs in county-month"
sort RN_id mdate
sort county mdate
merge m:1 county mdate using "$processed_data/county air complaints.dta"
replace county_air_complaint=0 if _merge==1
drop _merge
replace county_air_complaint=. if county_missing==1
gen other_air_complaint=county_air_complaint-air_incident
label var other_air_complaint "# of air complaints against OTHER RNs in county-month"
//region
sort region_id
merge m:1 region_id mdate using "$processed_data/region complaints.dta"
replace region_complaint=0 if _merge==1
drop _merge
//note: if county is missing, region is also missing bc both from facilities file
replace region_complaint=. if county_missing==1
gen other_complaint_region=region_complaint-incident
label var other_complaint_region "# of complaints against OTHER RNs in region-month"
sort RN_id mdate
sort region_id mdate
merge m:1 region_id mdate using "$processed_data/region air complaints.dta"
replace region_air_complaint=0 if _merge==1
drop _merge
replace region_air_complaint=. if county_missing==1
gen other_air_complaint_region=region_air_complaint-air_incident
label var other_air_complaint_region "# of air complaints against OTHER RNs in region-month"
//area
sort area_id
merge m:1 area_id mdate using "$processed_data/area complaints.dta"
replace area_complaint=0 if _merge==1
drop _merge
//note: if county is missing, area is also missing bc county and region are both from facilities file
replace area_complaint=. if county_missing==1
gen other_complaint_area=area_complaint-incident
label var other_complaint_area "# of complaints against OTHER RNs in area-month"
sort RN_id mdate
sort area_id mdate
merge m:1 area_id mdate using "$processed_data/area air complaints.dta"
replace area_air_complaint=0 if _merge==1
drop _merge
replace area_air_complaint=. if county_missing==1
gen other_air_complaint_area=area_air_complaint-air_incident
label var other_air_complaint_area "# of air complaints against OTHER RNs in area-month"
sort RN_id mdate
order year, after(mdate)
order month, after(year)
order county, after(month)
order county_missing, after(county)
order region_id, after(county_missing)
order TitleV, after(region_id)
order NSR, after(TitleV)
save "$processed_data/Panel.dta", replace

//8a. add in complaints by industry-months
//first add industry to RNs
use "$processed_data/facility_characteristics.dta", clear
keep RegulatedEntityNo SIC
rename RegulatedEntityNo RegulatedEntity
gen SIC_2digit = substr(SIC,1,2)
destring SIC_2digit, replace
replace SIC_2digit=. if SIC_2digit==99
drop if SIC_2digit==.
drop SIC 
duplicates drop
sort RegulatedEntity SIC_2digit
by RegulatedEntity: gen ind=_n
reshape wide SIC_2digit, i(RegulatedEntity) j(ind)
gen RN_id=substr(RegulatedEntity,3,.)
destring RN_id, replace
drop RegulatedEntity
sort RN_id
save "$processed_data/SIC_codes_temp.dta", replace

use "$processed_data/Panel.dta", clear
sort RN_id
merge m:1 RN_id using "$processed_data/SIC_codes_temp.dta"
drop if _merge==2
gen missing_industry=0
replace missing_industry=1 if _merge==1
label var missing_industry "=1 if RN_id does not have SIC codes in facility characteristics file"
unique RN_id if _merge==1
unique RN_id if _merge==3
drop _merge
save "$processed_data/Panel.dta", replace

//now merge in industry-month complaints
foreach x of numlist 1/12{
	use "$processed_data/Panel.dta", replace
	rename SIC_2digit`x' SIC_2digit
	sort SIC_2digit mdate
	merge m:1 SIC_2digit mdate using "$processed_data/industry_air_complaints.dta"
	drop if _merge==2
	rename ind_air_complaint ind_air_complaint`x'
	replace ind_air_complaint`x'=0 if ind_air_complaint`x'==. & missing_industry==0 & _merge==1
	tab ind_air_complaint`x' _merge,missing
	rename SIC_2digit SIC_2digit`x'
	keep RN_id mdate ind_air_complaint`x'
	isid RN_id mdate
	save "$processed_data/SIC_2digit`x'_panel.dta", replace
}

use "$processed_data/Panel.dta", clear
sort RN_id mdate
merge 1:1 RN_id mdate using "$processed_data/SIC_2digit1_panel.dta"
drop _merge
sort RN_id mdate
merge 1:1 RN_id mdate using "$processed_data/SIC_2digit2_panel.dta"
drop _merge
sort RN_id mdate
merge 1:1 RN_id mdate using "$processed_data/SIC_2digit3_panel.dta"
drop _merge
sort RN_id mdate
merge 1:1 RN_id mdate using "$processed_data/SIC_2digit4_panel.dta"
drop _merge
sort RN_id mdate
merge 1:1 RN_id mdate using "$processed_data/SIC_2digit5_panel.dta"
drop _merge
sort RN_id mdate
merge 1:1 RN_id mdate using "$processed_data/SIC_2digit6_panel.dta"
drop _merge
sort RN_id mdate
merge 1:1 RN_id mdate using "$processed_data/SIC_2digit7_panel.dta"
drop _merge
sort RN_id mdate
merge 1:1 RN_id mdate using "$processed_data/SIC_2digit8_panel.dta"
drop _merge
sort RN_id mdate
merge 1:1 RN_id mdate using "$processed_data/SIC_2digit9_panel.dta"
drop _merge
sort RN_id mdate
merge 1:1 RN_id mdate using "$processed_data/SIC_2digit10_panel.dta"
drop _merge
sort RN_id mdate
merge 1:1 RN_id mdate using "$processed_data/SIC_2digit11_panel.dta"
drop _merge
sort RN_id mdate
merge 1:1 RN_id mdate using "$processed_data/SIC_2digit12_panel.dta"
drop _merge
egen ind_air_complaint=rowtotal(ind_air_complaint*)
replace ind_air_complaint=ind_air_complaint-air_incident
replace ind_air_complaint=. if missing_industry==1
label var ind_air_complaint "total complaints against other facilities in the same industry/industries-month"
gen single_industry=.
replace single_industry=0 if SIC_2digit1!=. & SIC_2digit2!=. & missing_industry==0
replace single_industry=1 if SIC_2digit1!=. & SIC_2digit2==. & missing_industry==0
label var single_industry "=1 if SIC code is non-misisng with only one 2-digit code"
save "$processed_data/Panel.dta", replace

//9. create a variable that identifies RNs never investigated bt 2003 & 2019
use "$processed_data/Panel.dta", clear
keep RN_id day_air_inv
sort RN_id
collapse (sum) day_air_inv, by(RN_id)
gen never_air_inv=0
replace never_air_inv=1 if day_air_inv==0
label var never_air_inv "=1 if RN never has air investigation bt 2003 & 2019"
sort RN_id
save "$processed_data/temp.dta", replace
use "$processed_data/Panel.dta"
merge m:1 RN_id using "$processed_data/temp.dta"
drop _merge
order never_air_inv, after(NSR)
save "$processed_data/Panel.dta", replace

//10. add in air emission events
use "$processed_data/Emissions_events.dta", clear
gen RN_id=substr(RegulatedEntityNo,3,.)
label var RN_id "same as RN without 'RN'"
destring RN_id, replace
rename IncidentNo EEIN
gen temp = date(IncidentStartDate,"MDY")
drop IncidentStartDate 
rename temp IncidentStartDate
format IncidentStartDate %td
replace IncidentStartDate = . if IncidentStartDate < 0
gen year=year(IncidentStartDate)
gen month=month(IncidentStartDate)
gen mdate=ym(year,month)
format mdate %tm
//restrict to 2003 to 2019
keep if mdate>=tm(2003m1) & mdate<=tm(2019m12)
gen ee_incident=1
keep EEIN RN_id mdate ee_incident
duplicates drop
isid EEIN
//at this point EEIN uniquely identifies observations
sort RN_id mdate
//this will give us # of emission events in the month
collapse (sum) ee_incident, by(RN_id mdate)
label var ee_incident "# of emissions events for RN in month"
sort RN_id mdate
merge 1:1 RN_id mdate using "$processed_data/Panel.dta"
unique RN_id if _merge==3
drop if _merge==1
drop _merge
replace ee_incident=0 if ee_incident==.
gen p_ee_incident=(ee_incident>0)
label var p_ee_incident "=1 if RN had at least one emission event incident in month"
sort RN_id mdate
save "$processed_data/Air_Panel.dta", replace

erase "$processed_data/Panel.dta"
erase "$processed_data/Panel_inv_nov_noe_inc.dta"
erase "$processed_data/Panel_inv_nov_noe.dta"
erase "$processed_data/Panel_inv_nov.dta"
erase "$processed_data/Panel_inv.dta"
erase "$processed_data/facilities_temp.dta"
erase "$processed_data/temp.dta"